Updated 2021-08-19 13:59:05

Objective

  1. Develop a function to evaluate C-Q relationships that includes several different modeling approaches.

Models:

\(log(C) = \beta_{1} log(\alpha_1) + log(Q)\) for Q < Break point

\(log(C) = \beta_{2} log(\alpha_2) + log(Q)\) for Q > Break point


Breakpoints are determined through an iterative process and identified when the linear relationship changes. The model is estimated simultaneously yielding point estimates and relevant approximate standard errors of all the model parameters, including the break-points.

Function

# To check and install if needed packages
check.packages <- function(pkg){
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg)) 
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}

pkg=c("segmented","gvlma")
check.packages(pkg)
library(segmented)
library(gvlma)
#' Fitting models to Concentration-Discharge data using several different 
#' models/methods and extracting relevat information

#' @param logQ       log transformed paired discharge value
#' @param logC       log transformed paired concentration value
#' @param Q50        median value of daily discharge 
#' @param plot       default = TRUE; identifies if you want to plot results.
#' @param models     default = c("log-linear","segmented","moatar"); identified
#'                   what models to plot
#' @param plot.CI    default = TRUE; If the models are plotted a 95% confidence 
#'                   interval will also be plotted
#' @param CI.level   default = 0.95; confidence interval level
#' @param plot.lwd   default = 1.5; model line thickness
#' @param legend.pos default = "topleft"; legend position if models are plotted
#' @param print.rslt defualt = TRUE; print output table. 
#' @param ...        other plotting variables see ?plot (i.e. pch, lwd, bg, col, ylim, etc.)

CQ_fun=function(logQ,logC,Q50,plot=TRUE,
                models=c("log-linear","segmented","moatar"),
                plot.CI=TRUE,
                CI.level=0.95,
                plot.lwd=1.5,
                legend.pos="topleft",
                print.rslt=TRUE,
                xlab="Discharge",
                ylab="Concentration",...){
  
  # Power Law
  loglog.mod=lm(logC~logQ)
  loglog.mod.sum=summary(loglog.mod)
  
  # Check assumptions
  LL.assump=gvlma(loglog.mod)
  asumpt.rslt=as.character(ifelse(LL.assump$GlobalTest$GlobalStat4$pvalue<0.05,"No","Yes"))
  
  # variabled of interest
  loglog.beta=as.numeric(coef(loglog.mod)[2])
  loglog.alpha=as.numeric(coef(loglog.mod)[1])
  
  LL.rslt=data.frame(LL.R2=loglog.mod.sum$r.squared,
                     LL.R2.adj=loglog.mod.sum$adj.r.squared,
                     LL.RMSE=loglog.mod.sum$sigma,
                     LL.beta=loglog.beta,
                     LL.beta.LCI=as.numeric(confint(loglog.mod)[2,1]),
                     LL.beta.UCI=as.numeric(confint(loglog.mod)[2,2]),
                     LL.beta.SE=loglog.mod.sum$coefficients[2,2],
                     LL.beta.tval=loglog.mod.sum$coefficients[2,3],
                     LL.beta.pval=loglog.mod.sum$coefficients[2,4],
                     LL.alpha=loglog.alpha,
                     LL.AIC=AIC(loglog.mod),
                     LL.BIC=BIC(loglog.mod),
                     LL.LogLik=as.numeric(logLik(loglog.mod)),
                     LL.assumpt.pass=asumpt.rslt)
  
  # Segmented
  # Picks 1st breakpoint
  loglog.mod.seg=segmented(loglog.mod,seg.Z=~logQ,npsi=1)
  loglog.mod.seg.sum=summary(loglog.mod.seg)
  
  # is there a breakpoint?
  if(is.null(loglog.mod.seg$psi)){
    seg.rslt=data.frame(seg.bk.est=NA,
                        seg.bk.SE=NA,
                        seg.R2=NA,
                        seg.R2.adj=NA,
                        seg.RMSE=NA,
                        seg.AIC=NA,
                        seg.BIC=NA,
                        seg.LogLik=NA)
  }else{
    seg.psi.est=loglog.mod.seg$psi[1,c(2)]
    seg.psi.SE=seg.psi=loglog.mod.seg$psi[1,c(3)]
    seg.rslt=data.frame(seg.bk.est=exp(seg.psi.est),
                        seg.bk.SE=exp(seg.psi.SE),
                        seg.R2=loglog.mod.seg.sum$r.squared,
                        seg.R2.adj=loglog.mod.seg.sum$adj.r.squared,
                        seg.RMSE=loglog.mod.seg.sum$sigma,
                        seg.AIC=AIC(loglog.mod.seg),
                        seg.BIC=BIC(loglog.mod.seg),
                        seg.LogLik=as.numeric(logLik(loglog.mod.seg)))
  }
  
 
  
  # Moatar
  logQ.m=logQ[logQ<log(Q50)]
  logC.m=logC[logQ<log(Q50)]
  loglog.mod.inf=lm(logC.m~logQ.m)
  loglog.mod.inf.sum=summary(loglog.mod.inf)
  CQ50.inf=predict(loglog.mod.inf,data.frame(logQ.m=log(Q50)))
  
  logQ.m=logQ[logQ>log(Q50)]
  logC.m=logC[logQ>log(Q50)]
  loglog.mod.sup=lm(logC.m~logQ.m)
  loglog.mod.sup.sum=summary(loglog.mod.sup)
  CQ50.sup=predict(loglog.mod.sup,data.frame(logQ.m=log(Q50)))
  
  moatar.rslt=data.frame(Q50=Q50,
                         moatar.beta.inf=as.numeric(coef(loglog.mod.inf)[2]),
                         moatar.beta.inf.tval=loglog.mod.inf.sum$coefficients[2,3],
                         moatar.beta.inf.pval=loglog.mod.inf.sum$coefficients[2,4],
                         moatar.R2.inf=loglog.mod.inf.sum$r.squared,
                         moatar.R2.adj.inf=loglog.mod.inf.sum$adj.r.squared,
                         moatar.RMSE.inf=loglog.mod.inf.sum$sigma,
                         moatar.beta.sup=as.numeric(coef(loglog.mod.sup)[2]),
                         moatar.beta.sup.tval=loglog.mod.sup.sum$coefficients[2,3],
                         moatar.beta.sup.pval=loglog.mod.sup.sum$coefficients[2,4],
                         moatar.R2.sup=loglog.mod.sup.sum$r.squared,
                         moatar.R2.adj.sup=loglog.mod.sup.sum$adj.r.squared,
                         moatar.RMSE.sup=loglog.mod.sup.sum$sigma,
                         moatar.C_Q50=exp(mean(c(CQ50.inf,CQ50.sup))))
  
  # ratio of CV Conc and CV Discharge
  Cvc=(sd(exp(logC),na.rm=T)/mean(exp(logC),na.rm=T))*100
  Cvq=(sd(exp(logQ),na.rm=T)/mean(exp(logQ),na.rm=T))*100
  CVcq=data.frame(CVc_CVq=Cvc/Cvq)
  
  rslt.all=cbind(LL.rslt,seg.rslt,moatar.rslt,CVcq)
  
  # Plotting
  c.val=exp(logC)
  q.val=exp(logQ)
  if(plot==T){
    plot(c.val~q.val,log="xy",...)
    
    if(sum(match(models,"log-linear"),na.rm=T)==1){
      x.val=seq(min(logQ,na.rm=T),max(logQ,na.rm=T),length.out=70)
      LL.pred=predict(loglog.mod,data.frame(logQ=x.val),interval="confidence",
                      level=CI.level)
      lines(exp(x.val),exp(LL.pred[,1]),col="red",lwd=plot.lwd)
      if(plot.CI==T){
        lines(exp(x.val),exp(LL.pred[,2]),col="red",lty=2)
        lines(exp(x.val),exp(LL.pred[,3]),col="red",lty=2)
      }
    }
    
    if(is.null(loglog.mod.seg$psi)==F){
    if(sum(match(models,"segmented"),na.rm=T)==1){
      x.val=seq(min(logQ,na.rm=T),max(logQ,na.rm=T),length.out=70)
      seg.pred=predict(loglog.mod.seg,data.frame(logQ=x.val),
                       interval="confidence",level=CI.level)
      lines(exp(x.val),exp(seg.pred[,1]),col="blue",lwd=plot.lwd)
      if(plot.CI==T){
        lines(exp(x.val),exp(seg.pred[,2]),col="blue",lty=2)
        lines(exp(x.val),exp(seg.pred[,3]),col="blue",lty=2)
      }
    }
    }
    
    if(sum(match(models,"moatar"),na.rm=T)==1){
      abline(v=Q50)
      x.val=seq(min(logQ,na.rm=T),log(Q50),length.out=50)
      LL.inf=predict(loglog.mod.inf,data.frame(logQ.m=x.val),
                     interval="confidence",level=CI.level)          
      lines(exp(x.val),exp(LL.inf[,1]),col="forestgreen",lwd=plot.lwd)
      if(plot.CI==T){
        lines(exp(x.val),exp(LL.inf[,2]),col="forestgreen",lty=2)
        lines(exp(x.val),exp(LL.inf[,3]),col="forestgreen",lty=2)
      }
      
      x.val=seq(log(Q50),max(logQ,na.rm=T),length.out=50)
      LL.sup=predict(loglog.mod.sup,data.frame(logQ.m=x.val),
                     interval="confidence",level=CI.level)          
      lines(exp(x.val),exp(LL.sup[,1]),col="forestgreen")
      if(plot.CI==T){
        lines(exp(x.val),exp(LL.sup[,2]),col="forestgreen",lty=2)
        lines(exp(x.val),exp(LL.sup[,3]),col="forestgreen",lty=2)
      }
    }
    
    mod.col=c("red","blue","forestgreen")
    mod.col=mod.col[match(models,c("log-linear","segmented","moatar"))]
    legend.pos=if(is.na(legend.pos)==T){"topleft"}else(legend.pos)
    legend(legend.pos,
           legend=models,
           lty=1,
           col=mod.col,
           ncol=1,bty="n",y.intersp=1,x.intersp=0.75,
           xpd=NA,xjust=0.5,yjust=0.5,title="Model",title.adj = 0)
    
  }
  if(print.rslt==TRUE){return(rslt.all)}
}

Output includes measure of relative and absolute fit, slopes and intercepts and breakpoints as a data.frame.


Examples

UMR: M786.2

# read discharge data
dat.Q=read.csv("./Data/M786.2C_Q_WRTDS.csv")
dat.Q$Date=as.Date(dat.Q$Date)# identify date

# read concentration data
dat.C=read.csv("./Data/M786.2C_Si_WRTDS.csv")
dat.C$Date=as.Date(dat.C$Date)# identify date

# merge the C and Q data
dat.CQ=merge(dat.Q,dat.C,"Date")

Q50=median(dat.Q$Q,na.rm=T)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,pch=19,ylim=c(1,10),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)")
C-Q relationship of M786.2 (UMR; all data) and all models

C-Q relationship of M786.2 (UMR; all data) and all models

##          LL.R2    LL.R2.adj   LL.RMSE    LL.beta LL.beta.LCI LL.beta.UCI
## 1 0.0009164036 -0.001813333 0.3739276 0.01538679 -0.03683499  0.06760857
##   LL.beta.SE LL.beta.tval LL.beta.pval LL.alpha   LL.AIC   LL.BIC LL.LogLik
## 1 0.02655615     0.579406    0.5626717 1.560551 324.3351 336.0594 -159.1676
##   LL.assumpt.pass seg.bk.est seg.bk.SE     seg.R2 seg.R2.adj  seg.RMSE  seg.AIC
## 1              No   14900.55  1.334313 0.01539102 0.00727611 0.3722274 322.9646
##   seg.BIC seg.LogLik   Q50 moatar.beta.inf moatar.beta.inf.tval
## 1 342.505  -156.4823 18000      -0.2480301            -2.298768
##   moatar.beta.inf.pval moatar.R2.inf moatar.R2.adj.inf moatar.RMSE.inf
## 1            0.0229372    0.03492981        0.02831974       0.4274229
##   moatar.beta.sup moatar.beta.sup.tval moatar.beta.sup.pval moatar.R2.sup
## 1      0.02826071            0.6106936            0.5420415     0.0017157
##   moatar.R2.adj.sup moatar.RMSE.sup moatar.C_Q50   CVc_CVq
## 1      -0.002884689       0.3277656     5.130173 0.3462364

Here is an example of plotting each individual model without individual data points, and output suppressed.

par(mar=c(2,4,1,1),oma=c(3,1,1,1),mgp=c(2,1,0))
layout(matrix(1:3,3,1))
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,10),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)",
       type="n",
       models="log-linear",
       print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,10),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)",
       type="n",
       models="segmented",
       print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,10),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)",
       type="n",
       models="moatar",
       print.rslt=F,
       xpd=NA)
C-Q relationship of M786.2 (UMR; all data) and all models with individual data points removed from the plot

C-Q relationship of M786.2 (UMR; all data) and all models with individual data points removed from the plot

LUQ: MPR

# read discharge data
dat.Q=read.csv("./Data/MPR_Q_WRTDS.csv")
dat.Q$Date=as.Date(dat.Q$Date)# identify date

# read concentration data
dat.C=read.csv("./Data/MPR_Si_WRTDS.csv")
dat.C$Date=as.Date(dat.C$Date)# identify date

# merge the C and Q data
dat.CQ=merge(dat.Q,dat.C,"Date")

Q50=median(dat.Q$Q,na.rm=T)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,pch=19,ylim=c(1,50),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)",
       main="LUQ MPR")
C-Q relationship of MPR (LUQ; all data) and all models

C-Q relationship of MPR (LUQ; all data) and all models

##       LL.R2 LL.R2.adj   LL.RMSE    LL.beta LL.beta.LCI LL.beta.UCI LL.beta.SE
## 1 0.2607266 0.2601216 0.3397364 -0.2674487  -0.2927239  -0.2421736 0.01288294
##   LL.beta.tval LL.beta.pval LL.alpha   LL.AIC  LL.BIC LL.LogLik LL.assumpt.pass
## 1    -20.75992 3.084079e-82 3.928898 834.7354 850.065 -414.3677              No
##   seg.bk.est seg.bk.SE    seg.R2 seg.R2.adj  seg.RMSE  seg.AIC  seg.BIC
## 1   56.92265  1.222207 0.2709456  0.2691529 0.3376566 821.6979 847.2473
##   seg.LogLik Q50 moatar.beta.inf moatar.beta.inf.tval moatar.beta.inf.pval
## 1   -405.849  35      -0.2005908            -4.499335         8.188038e-06
##   moatar.R2.inf moatar.R2.adj.inf moatar.RMSE.inf moatar.beta.sup
## 1    0.03263879        0.03102652       0.3482593      -0.3427692
##   moatar.beta.sup.tval moatar.beta.sup.pval moatar.R2.sup moatar.R2.adj.sup
## 1            -15.74424         4.156418e-47     0.2892829         0.2881159
##   moatar.RMSE.sup moatar.C_Q50   CVc_CVq
## 1       0.3179681     20.55023 0.2016571
par(mar=c(2,4,1,1),oma=c(3,1,1,1),mgp=c(2,1,0))
layout(matrix(1:3,3,1))
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,50),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)",
       type="n",
       models="log-linear",
       print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,50),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)",
       type="n",
       models="segmented",
       print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,50),
       legend.pos = "bottomright",
       ylab="Si Concentration (mg/L)",
       xlab="Discharge (cfs)",
       type="n",
       models="moatar",
       print.rslt=F,
       xpd=NA)
C-Q relationship of MPR (LUQ; all data) and all models with individual data points removed from the plot

C-Q relationship of MPR (LUQ; all data) and all models with individual data points removed from the plot


Paired Si Concentration - Discharge

dat=read.csv(paste0("./Data/Si_Q_WRTDS_sites.csv"))
C-Q relationship of all sites with available data.

C-Q relationship of all sites with available data.

site.vals=unique(dat$site.name)

CQ.rslt=data.frame()
for(i in 1:length(site.vals)){
  tmp=subset(dat,site.name==site.vals[i])
  tmp$logQ=log(tmp$Q+1)
  tmp$logC=log(tmp$Si)
 
  rslt=with(tmp,CQ_fun(logQ,logC,median(Q+1,na.rm=T),plot=F))
  rslt$SITE=site.vals[i]
  rslt$LTER=unique(tmp$LTER)
  CQ.rslt=rbind(CQ.rslt,rslt)
}

head(CQ.rslt,2L)
##          LL.R2    LL.R2.adj   LL.RMSE     LL.beta LL.beta.LCI LL.beta.UCI
## 1 0.0005475968 -0.000491335 0.5144224 -0.01141096 -0.04225563  0.01943371
## 2 0.1508349862  0.144681617 0.7198764 -0.20764344 -0.29057065 -0.12471624
##   LL.beta.SE LL.beta.tval LL.beta.pval    LL.alpha    LL.AIC    LL.BIC
## 1 0.01571757   -0.7260005 4.680151e-01  0.08194892 1458.1495 1472.7628
## 2 0.04193954   -4.9510184 2.120389e-06 -0.62016070  309.2592  318.0841
##   LL.LogLik LL.assumpt.pass  seg.bk.est seg.bk.SE      seg.R2    seg.R2.adj
## 1 -726.0747              No 62217.95768  1.521093 0.002792477 -0.0003237965
## 2 -151.6296             Yes     1.17013  1.165232 0.235717577  0.2188584058
##    seg.RMSE   seg.AIC   seg.BIC seg.LogLik     Q50 moatar.beta.inf
## 1 0.5143793 1459.9818 1484.3372  -724.9909 12810.0     -0.00047434
## 2 0.6879532  298.5149  313.2231  -144.2574    10.9     -0.02867711
##   moatar.beta.inf.tval moatar.beta.inf.pval moatar.R2.inf moatar.R2.adj.inf
## 1          -0.01338789            0.9893239  3.734076e-07      -0.002082959
## 2          -0.20704248            0.8366046  6.393908e-04      -0.014276439
##   moatar.RMSE.inf moatar.beta.sup moatar.beta.sup.tval moatar.beta.sup.pval
## 1       0.4781620      0.03248396            0.7249219           0.46885319
## 2       0.8861889     -0.17356137           -2.3325204           0.02268602
##   moatar.R2.sup moatar.R2.adj.sup moatar.RMSE.sup moatar.C_Q50   CVc_CVq
## 1   0.001093619     -0.0009874362       0.5485169    0.9618526 0.4177392
## 2   0.075104949      0.0613005449       0.4827239    0.3607766 0.6118868
##                   SITE LTER
## 1               ALBION  NWT
## 2 Andersen Creek at H1  MCM



Download CQ.rslt as Excel file



Exploratory Plots

Log-Linear Slope for each site.

The ratio CV concentration and CV discharge volume for each site.